{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c205056b-6f97-4266-9e53-9c14472c96bf",
   "metadata": {},
   "source": [
    "## 示例选择器\n",
    "- 根据长度要求智能选择示例\n",
    "- 根据输入相似度选择示例（最大边际相关性）\n",
    "- 根据输入相似度选择示例（最大余弦相似度）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "3bb4959f-0e44-4ebf-b4b5-9c0dc97aa24c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "给出每个输入词的反义词\n",
      "\n",
      "原词:happy\n",
      "反义:sad\n",
      "\n",
      "原词:tall\n",
      "反义:short\n",
      "\n",
      "原词:sunny\n",
      "反义:gloomy\n",
      "\n",
      "原词:windy\n",
      "反义:calm\n",
      "\n",
      "原词:高兴\n",
      "反义:悲伤\n",
      "\n",
      "原词:big\n",
      "反义\n"
     ]
    }
   ],
   "source": [
    "# 根据输入的提示词长度综合计算最终长度,智能截取或者添加提示词的示例\n",
    "from langchain.prompts import PromptTemplate\n",
    "from langchain.prompts import FewShotPromptTemplate\n",
    "from langchain.prompts.example_selector import LengthBasedExampleSelector\n",
    "\n",
    "# 假设已经有这么多的提示词示例组:\n",
    "examples = [\n",
    "    {\"input\":\"happy\",\"output\":\"sad\"},\n",
    "    {\"input\":\"tall\",\"output\":\"short\"},\n",
    "    {\"input\":\"sunny\",\"output\":\"gloomy\"},\n",
    "    {\"input\":\"windy\",\"output\":\"calm\"},\n",
    "    {\"input\":\"高兴\",\"output\":\"悲伤\"}\n",
    "]\n",
    "\n",
    "# 构造提示词模板\n",
    "example_prompt = PromptTemplate(\n",
    "    input_variables=[\"input\", \"output\"],\n",
    "    template=\"原词:{input}\\n反义:{output}\"\n",
    ")\n",
    "\n",
    "# 调用长度示例选择器\n",
    "example_selector = LengthBasedExampleSelector(\n",
    "    # 传入提示词示例组\n",
    "    examples=examples,\n",
    "    # 传入提示词模板\n",
    "    example_prompt=example_prompt,\n",
    "    # 设置格式化后的提示词最大长度\n",
    "    max_length=25,\n",
    "    # 内置的get_text_length,如果默认分词计算方式不满足,可以自己扩展\n",
    "    # get_text_length:Callable[[str],int] = lambda x:Len(re.split(\"\\n|\",x))\n",
    ")\n",
    "\n",
    "# 使用小样本提示词模版来实现动态示例的调用\n",
    "dynamic_prompt = FewShotPromptTemplate(\n",
    "    example_selector=example_selector,\n",
    "    example_prompt=example_prompt,\n",
    "    prefix=\"给出每个输入词的反义词\",\n",
    "    suffix=\"原词:{adjective}\\n反义\",\n",
    "    input_variables=[\"adjective\"]\n",
    ")\n",
    "\n",
    "print(dynamic_prompt.format(adjective=\"big\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "7740d35c-8a34-44af-8d13-1b07737aa311",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "给出每个输入词的反义词\n",
      "\n",
      "原词:happy\n",
      "反义:sad\n",
      "\n",
      "原词:tall\n",
      "反义:short\n",
      "\n",
      "原词:big and huge and massive and large and gigantic and tall and much much much much much much bigger then everyone\n",
      "反义\n"
     ]
    }
   ],
   "source": [
    "# 如果输入长度很长，则最终输出会根据长度要求减少\n",
    "long_string = \"big and huge and massive and large and gigantic and tall and much much much much much much bigger then everyone\"\n",
    "print(dynamic_prompt.format(adjective=long_string))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1a2101a5-d446-4779-b070-d396064a8601",
   "metadata": {},
   "source": [
    "### 根据输入相似度选择示例(最大边际相关性)\n",
    "- MMR是一种在信息检索中常用的方法，它的目标是在相关性和多样性之间找到一个平衡\n",
    "- MMR会首先找出与输入最相似（即余弦相似度最大）的样本\n",
    "- 然后在迭代添加样本的过程中，对于与已选择样本过于接近（即相似度过高）的样本进行惩罚\n",
    "- MMR既能确保选出的样本与输入高度相关，又能保证选出的样本之间有足够的多样性\n",
    "- 关注如何在相关性和多样性之间找到一个平衡"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e492250f-1cc0-4bc9-8432-68d75a862949",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install tiktoken\n",
    "!pip install faiss-cpu"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d6ee0d19-772d-4753-bbd2-5c0f398f678a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 使用MMR来检索相关示例，以使示例尽量符合输入\n",
    "from langchain.prompts.example_selector import MaxMarginalRelevanceExampleSelector\n",
    "from langchain.vectorstores import FAISS\n",
    "from langchain_openai  import OpenAIEmbeddings\n",
    "from langchain.prompts import FewShotPromptTemplate,PromptTemplate\n",
    "import os\n",
    "\n",
    "os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
    "os.environ[\"OPENAI_PROXY\"] = \"https://ai-yyds.com/v1\"\n",
    "\n",
    "api_base = os.getenv(\"OPENAI_PROXY\")\n",
    "api_key = os.getenv(\"OPENAI_API_KEY\")\n",
    "\n",
    "# 假设已经有这么多的提示词示例组：\n",
    "examples = [\n",
    "    {\"input\":\"happy\",\"output\":\"sad\"},\n",
    "    {\"input\":\"tall\",\"output\":\"short\"},\n",
    "    {\"input\":\"sunny\",\"output\":\"gloomy\"},\n",
    "    {\"input\":\"windy\",\"output\":\"calm\"},\n",
    "    {\"input\":\"高兴\",\"output\":\"悲伤\"}\n",
    "]\n",
    "\n",
    "# 构造提示词模版\n",
    "example_prompt = PromptTemplate(\n",
    "    input_variables=[\"input\", \"output\"],\n",
    "    template=\"原词:{input}\\n反义:{output}\"\n",
    ")\n",
    "\n",
    "# 调用MMR\n",
    "example_selector = MaxMarginalRelevanceExampleSelector.from_examples(\n",
    "    #传入示例组\n",
    "    examples,\n",
    "    #使用openai的嵌入来做相似性搜索\n",
    "    OpenAIEmbeddings(openai_api_base=api_base,openai_api_key=api_key),\n",
    "    #设置使用的向量数据库是什么\n",
    "    FAISS,\n",
    "    #结果条数\n",
    "    k=2,\n",
    ")\n",
    "\n",
    "#使用小样本模版\n",
    "mmr_prompt = FewShotPromptTemplate(\n",
    "    example_selector=example_selector,\n",
    "    example_prompt=example_prompt,\n",
    "    prefix=\"给出每个输入词的反义词\",\n",
    "    suffix=\"原词：{adjective}\\n反义：\",\n",
    "    input_variables=[\"adjective\"]\n",
    ")\n",
    "\n",
    "# 当输入一个描述情绪的词语的时候，应该选择同样是描述情绪的一对示例组来填充提示词模版\n",
    "print(mmr_prompt.format(adjective=\"难过\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "35890257-b339-4380-b81d-6c2d27c3b66c",
   "metadata": {},
   "source": [
    "### 根据输入相似度选择示例(最大余弦相似度)\n",
    "- 一种常见的相似度计算方法\n",
    "- 它通过计算两个向量（在这里，向量可以代表文本、句子或词语）之间的余弦值来衡量它们的相似度\n",
    "- 余弦值越接近1，表示两个向量越相似\n",
    "- 主要关注的是如何准确衡量两个向量的相似度"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0aaaa29b-0a47-42b0-b967-2e50c27fd766",
   "metadata": {},
   "source": [
    "! pip install chromadb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "6c8bbce4-cd03-4c58-b71b-6e0f2d7f4106",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.prompts.example_selector import SemanticSimilarityExampleSelector\n",
    "from langchain.vectorstores import Chroma\n",
    "from langchain_openai  import OpenAIEmbeddings\n",
    "from langchain.prompts import FewShotPromptTemplate, PromptTemplate\n",
    "import os\n",
    "\n",
    "api_base = os.getenv(\"OPENAI_PROXY\")\n",
    "api_key = os.getenv(\"OPENAI_API_KEY\")\n",
    "\n",
    "# 假设已经有这么多的提示词示例组：\n",
    "examples = [\n",
    "    {\"input\":\"happy\",\"output\":\"sad\"},\n",
    "    {\"input\":\"tall\",\"output\":\"short\"},\n",
    "    {\"input\":\"sunny\",\"output\":\"gloomy\"},\n",
    "    {\"input\":\"windy\",\"output\":\"calm\"},\n",
    "    {\"input\":\"高兴\",\"output\":\"悲伤\"}\n",
    "]\n",
    "\n",
    "# 构造提示词模版\n",
    "example_prompt = PromptTemplate(\n",
    "    input_variables=[\"input\", \"output\"],\n",
    "    template=\"原词:{input}\\n反义:{output}\"\n",
    ")\n",
    "\n",
    "# 调用相似度选择器\n",
    "example_selector = SemanticSimilarityExampleSelector.from_examples(\n",
    "    #传入示例组\n",
    "    examples,\n",
    "    #使用openai的嵌入来做相似性搜索\n",
    "    OpenAIEmbeddings(openai_api_base=api_base,openai_api_key=api_key),\n",
    "    #设置使用的向量数据库是什么\n",
    "    Chroma,\n",
    "    #结果条数\n",
    "    k=1,\n",
    ")\n",
    "\n",
    "#使用小样本模版\n",
    "mmr_prompt = FewShotPromptTemplate(\n",
    "    example_selector=example_selector,\n",
    "    example_prompt=example_prompt,\n",
    "    prefix=\"给出每个输入词的反义词\",\n",
    "    suffix=\"原词：{adjective}\\n反义：\",\n",
    "    input_variables=[\"adjective\"]\n",
    ")\n",
    "\n",
    "# 输入一个形容感觉的词语，应该查找近似的 happy/sad 示例\n",
    "print(similar_prompt.format(adjective=\"worried\"))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (langchain)",
   "language": "python",
   "name": "langchain-env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
